start_time <- Sys.time()
# Import functions
root = "./"
source(file.path(root,"general_functions.R"))
import_parameters(params)## **** __Utilized Cores__ **** = 4$subsetGenes
## [1] "protein_coding"
##
## $subsetCells
## [1] "F"
##
## $resolution
## [1] "0.001"
##
## $resultsPath
## [1] "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.001__perplexity-30__nCores-4"
##
## $nCores
## [1] "4"
##
## $perplexity
## [1] "30"
load(file.path(resultsPath, "scRNAseq_results.RData"))
# resultsPath <- "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.2__perplexity-40__nCores-4"
library(Seurat)
library(cowplot)
library(ggplot2)
library(dplyr)
library(data.table)
library(readxl)
library(reshape2)
library(ggrepel)
library(plotly)
library(GeneOverlap) # BiocManager::install("GeneOverlap")
library(enrichR) #BiocManager::install("enrichR")
library(monocle) # BiocManager::install("monocle")
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
#
if(getwd()=="/Users/schilder/Desktop/PD_scRNAseq"){
allGenes <- F
test.use <- "wilcox"
}else{
allGenes <- T
# MAST not install on Minerva...
test.use <- "wilcox"}
allGenes ## [1] TRUE
CDS_to_Seurat <- function(cds, export_PC=F, export_UMAP=F, export_tSNE=F){
# sum(colnames(mDAT) != colnames( mDAT@reducedDimS))
## Manually save reduced dimensions
if(export_PC==T){
mDAT$PC1 <- mDAT@normalized_data_projection[,1]
mDAT$PC2 <- mDAT@normalized_data_projection[,2]
mDAT$PC3 <- mDAT@normalized_data_projection[,3]
}
if(export_UMAP==T){
mDAT$UMAP1 <- mDAT@reducedDimA[1,]
mDAT$UMAP2 <- mDAT@reducedDimA[2,]
mDAT$UMAP3 <- mDAT@reducedDimA[3,]
}
DAT <- exportCDS(mDAT, export_to = "Seurat", export_all = T)
DAT@scale.data <- DAT@data #Data was already scaled in Monocle
# DAT <- Seurat::AddMetaData(DAT, pData(mDAT)[c("garnett_cluster","cell_type","cluster_ext_type")])
return(DAT)
}
DAT <- CDS_to_Seurat(mDAT, export_PC = T, export_UMAP = T)## Scaling data matrix
head(DAT@meta.data) ## nGene nUMI orig.ident singlet.or.not.binary HTO
## GTCATTTTCCGCATCT 2035 6729 RAJ_13357 1 NYUMD0011
## ATAAGAGAGGAGTTGC 1914 6718 RAJ_13357 1 BID0076
## CATTATCCATGGTCTA 1971 6156 RAJ_13357 1 MSMD0067
## CTTAGGAGTGGCGAAT 1893 6392 RAJ_13357 1 NYUMD0011
## CATCAGAAGCACCGCT 1967 6110 RAJ_13357 1 MSMD0067
## GCGGGTTAGTAGCCGA 1868 5977 RAJ_13357 1 NYUMD0011
## ID percent.mito res.2 res.1 res.0.6 res.0.3
## GTCATTTTCCGCATCT MSMD0207 0.04191439 0 1 0 0
## ATAAGAGAGGAGTTGC BIMD0076 0.04362066 0 1 0 0
## CATTATCCATGGTCTA NYUMD0015 0.03086921 0 1 0 0
## CTTAGGAGTGGCGAAT MSMD0207 0.03613892 0 1 0 0
## CATCAGAAGCACCGCT NYUMD0015 0.04437531 19 10 8 0
## GCGGGTTAGTAGCCGA MSMD0207 0.03514056 6 2 0 0
## barcode dx mut Ethnicity Gender Age
## GTCATTTTCCGCATCT GTCATTTTCCGCATCT PD PD White M 71
## ATAAGAGAGGAGTTGC ATAAGAGAGGAGTTGC PD GBA White M 47
## CATTATCCATGGTCTA CATTATCCATGGTCTA control control White M 51
## CTTAGGAGTGGCGAAT CTTAGGAGTGGCGAAT PD PD White M 71
## CATCAGAAGCACCGCT CATCAGAAGCACCGCT control control White M 51
## GCGGGTTAGTAGCCGA GCGGGTTAGTAGCCGA PD PD White M 71
## Size_Factor louvain_component Cluster garnett_cluster
## GTCATTTTCCGCATCT 5.117674 1 1 4
## ATAAGAGAGGAGTTGC 5.109379 1 1 4
## CATTATCCATGGTCTA 4.661479 1 1 1
## CTTAGGAGTGGCGAAT 4.844786 1 1 1
## CATCAGAAGCACCGCT 4.610054 1 1 6
## GCGGGTTAGTAGCCGA 4.508861 1 1 6
## cell_type cluster_ext_type cluster_ext_type_filt PC1
## GTCATTTTCCGCATCT Monocytes Monocytes Monocytes 21.74363
## ATAAGAGAGGAGTTGC Monocytes Monocytes Monocytes 20.51278
## CATTATCCATGGTCTA Monocytes Monocytes Monocytes 20.73024
## CTTAGGAGTGGCGAAT Monocytes Monocytes Monocytes 21.48480
## CATCAGAAGCACCGCT Monocytes Monocytes Monocytes 18.94471
## GCGGGTTAGTAGCCGA Monocytes Monocytes Monocytes 17.77910
## PC2 PC3 UMAP1 UMAP2 UMAP3
## GTCATTTTCCGCATCT 0.72274985 -2.813874 1.365752 1.513548 1.161469
## ATAAGAGAGGAGTTGC 1.02458096 -3.651205 1.354717 1.488010 1.172568
## CATTATCCATGGTCTA -0.03820978 -2.694958 1.351607 1.519749 1.158323
## CTTAGGAGTGGCGAAT -0.42949109 -1.943783 1.369097 1.527969 1.158835
## CATCAGAAGCACCGCT 4.19589928 -4.857566 1.305207 1.470326 1.143383
## GCGGGTTAGTAGCCGA 2.61114660 -7.713996 1.303913 1.455874 1.130939
make_markerDF <- function(DAT, markerList){
markerData <- DAT@data[row.names(DAT@data) %in% markerList,] %>% t() %>%
as.matrix() %>% data.table(keep.rownames = T, key = "rn")
markerData[markerData$markerList[1]==0,] <- NA
markerData[markerData$markerList[2]==0,] <- NA
colnames(markerData) <- c("Cell",paste("Gene", rep(1:length(markerList)), sep=""))
return(markerData)
}
gene_gene_plot <- function(DAT, markerList, colorby, title="", plot=T, legend=T, filterZeros=T){
## Efficiently merge using data.table
markerData <- make_markerDF(DAT, markerList)
dt1 <- data.table(markerData, keep.rownames = "Cell", key = "Cell") %>% copy()
dt2 <- data.table(DAT@meta.data, keep.rownames = "Cell", key = "Cell") %>% copy()
row.names(dt2) <- dt2$Cell
markerDT <- dt1[dt2]
if(filterZeros==T){markerDT <- subset(markerDT, Gene1!=0 & Gene2!=0)}
p <- ggplot(data = markerDT, aes(x=Gene1, y=Gene2 )) +
stat_density_2d(aes(fill = ..level..),
geom = "polygon", colour="purple", bins = 10, size=.1) +
geom_point(aes(color=as.factor(eval(parse(text=colorby)))), shape=16, stroke=0, size=2, alpha=.5) +
guides(colour = guide_legend(title=colorby, override.aes = list(alpha = 1))) +
scale_color_manual(values = pretty_colors(mDAT, var = colorby)) +
labs(x=markerList[1], y=markerList[2], title=title) +
geom_smooth(method="lm")
if(legend==F){p <- p + theme(legend.position = "none")}
if(plot==T){print(p)}
return(p)
} p <- gene_gene_plot(DAT, c("CD14", "FCGR3A"), colorby="cluster_ext_type_filt",
title="Monocyte Subtype Markers")# Identify HLA gene names
d <- DAT@data@Dimnames[[1]]
HLA_genes <- d[startsWith(d, "HLA-DR")]
# Plot
HLA_1 <- gene_gene_plot(DAT, c("LRRK2", HLA_genes[1]), colorby="cluster_ext_type_filt",
title="", plot = F, legend=F)
HLA_2 <- gene_gene_plot(DAT, c("LRRK2", HLA_genes[2]), colorby="cluster_ext_type_filt",
title="", plot = F, legend=F)
HLA_3 <- gene_gene_plot(DAT, c("LRRK2", HLA_genes[3]), colorby="cluster_ext_type_filt",
title="", plot = F, legend=F)
plot_grid(HLA_1, HLA_2, HLA_3)Remove filters to get all DEGs.
clustDAT <- SubsetData(DAT, subset.name = "Cluster", accept.value = c(1,2), do.scale = F)
DEGs_monocytes <- runDGE(DAT, meta_var = "Cluster", group1 = 1, group2 = 2,
allGenes = allGenes, test.use = test.use)## Warning: Removed 1 rows containing missing values (geom_point).
# DEGs_monocytes <- read.csv("Results/MonocyteSubtype.markers.csv", row.names = 1)
createDT(DEGs_monocytes, caption="DEGs between cluster 1 (CD16-- monocytes) and cluster 2 (CD16+ monocytes")## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
write.csv(DEGs_monocytes, file.path(resultsPath, "MonocyteSubtype.markers.csv"), quote = F, row.names = T)identify_unique_markers <-function(DAT, clusterA, clusterB, allGenes=F, test.use="wilcox"){
DAT <- SetIdent(DAT, ident.use = DAT@meta.data$Cluster)
if(allGenes==F){
clustA.markers <- FindMarkers(DAT, ident.1=clusterA, min.pct = 0.25,
only.pos = F, test.use = test.use)
clustB.markers <- FindMarkers(DAT, ident.1=clusterB, min.pct = 0.25,
only.pos = F, test.use = test.use)
}else{
clustA.markers <- FindMarkers(DAT, ident.1=clusterA,
only.pos = F, test.use = test.use,
logfc.threshold = -Inf, min.pct = -Inf,
min.cells.group = -Inf,min.cells.gene = -Inf,
min.diff.pct = -Inf)
clustB.markers <- FindMarkers(DAT, ident.1=clusterB,
only.pos = F, test.use = test.use,
logfc.threshold = -Inf, min.pct = -Inf,
min.cells.group = -Inf,min.cells.gene = -Inf,
min.diff.pct = -Inf)
}
clustA.markers <- cbind(Gene=row.names(clustA.markers), clustA.markers)
clustB.markers <- cbind(Gene=row.names(clustB.markers), clustB.markers)
clustA.uniqueMarkers <- clustA.markers[!(row.names(clustA.markers) %in% row.names(clustB.markers)),] %>%
subset(p_val_adj<=0.05) %>% mutate(Cluster=clusterA)
clustB.uniqueMarkers <- clustB.markers[!(row.names(clustB.markers) %in% row.names(clustA.markers)),] %>%
subset(p_val_adj<=0.05) %>% mutate(Cluster=clusterB)
uniqueMarkers <- rbind(clustA.uniqueMarkers, clustB.uniqueMarkers)
row.names(uniqueMarkers) <- uniqueMarkers$Gene
# difference <- abs( length(row.names(clustA.uniqueMarkers)) -
# length(row.names(clustB.uniqueMarkers) ) )
# uniqueMarkers <- data.frame(ClusterA_markers=c(row.names(clustA.uniqueMarkers), rep("",difference) ),
# ClusterB_markers=row.names(clustB.uniqueMarkers))
write.csv(uniqueMarkers,
file.path(resultsPath,"unique_cluster_markers.csv"),
quote = F, row.names = F)
createDT(uniqueMarkers, "Unique/Mutually Exclusive Markers of Cluster 1 and Cluster 2")
return(uniqueMarkers)
}
# MUST use full dataset w/ all clusters (DAT)
uniqueMarkers <- identify_unique_markers(DAT, clusterA = 1, clusterB = 2,
allGenes=T, test.use = test.use) # clustDAT@meta.data$dx %>% unique()
# FDR <- 0.05/dim(DEGs)[1]
# subset(DEGs, p_val<FDR)
DEGs <- runDGE(clustDAT, meta_var = "dx", group1 = "PD", group2 = "control",
allGenes = F, test.use = test.use) # clustDAT@meta.data$dx %>% unique()
DEGs <- runDGE(clustDAT, meta_var = "mut", group1 = "PD", group2 = "GBA",
allGenes = F, test.use = test.use)## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
DGE_within_clusters(clustDAT, meta_var = "dx", group1 = "PD", group2 = "control",
clusterList = c(1,2), allGenes = F, test.use = test.use)[1] “Not showing table”
[1] “Not showing table”
DGE_within_clusters(DAT, meta_var = "mut", group1 = "PD", group2 = "GBA",
clusterList = c(1,2), allGenes = F, test.use = test.use)[1] “Not showing table”
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
[1] “Not showing table”
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
enrichr_dbs <- c("KEGG_2018", "Reactome_2016",
"GO_Biological_Process_2018", "GO_Molecular_Function_2018", "GO_Cellular_Component_2018",
"Rare_Diseases_AutoRIF_ARCHS4_Predictions", "Human_Gene_Atlas")
# createDT(enrichR::listEnrichrDbs(), "Enrichr Databases")
geneList <- data.frame(Gene=row.names(DEGs_monocytes),
Weight=scales::rescale(length(DEGs_monocytes$p_val_adj):1))
results <- enrichr(genes = geneList, databases = enrichr_dbs ) Uploading data to Enrichr…
## Error in curl::curl_fetch_memory(url, handle = handle): Failed to connect to amp.pharm.mssm.edu port 80: No route to host
for (db in enrichr_dbs){
cat('\n')
cat("##",db,"\n")
# res <- subset(results[[db]], Adjusted.P.value<=0.05)
createDT_html(results[[db]], paste("Enrichr Results:",db))
cat('\n')
} ## Error in crosstalk::is.SharedData(data): object 'results' not found
save.image(file.path(resultsPath, "scRNAseq_results.RData"))
end_time <- Sys.time()
end_time - start_time## Time difference of 4.521736 hours
cat("\n\n")sessionInfo()## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
##
## Matrix products: default
## BLAS/LAPACK: /hpc/packages/minerva-common/intel/parallel_studio_xe_2018/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] splines stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] bindrcpp_0.2.2 garnett_0.1.4 monocle_2.10.1
## [4] DDRTree_0.1.5 irlba_2.3.3 VGAM_1.1-1
## [7] Biobase_2.42.0 BiocGenerics_0.28.0 enrichR_1.0
## [10] GeneOverlap_1.18.0 plotly_4.7.1 ggrepel_0.8.0
## [13] reshape2_1.4.3 readxl_1.1.0 data.table_1.11.8
## [16] dplyr_0.7.6 Seurat_2.3.4 Matrix_1.2-14
## [19] cowplot_0.9.3 ggplot2_3.1.0
##
## loaded via a namespace (and not attached):
## [1] snow_0.4-3 backports_1.1.2 Hmisc_4.1-1
## [4] plyr_1.8.4 igraph_1.2.4 lazyeval_0.2.1
## [7] crosstalk_1.0.0 densityClust_0.3 fastICA_1.2-1
## [10] digest_0.6.18 foreach_1.4.4 htmltools_0.3.6
## [13] viridis_0.5.1 lars_1.2 gdata_2.18.0
## [16] magrittr_1.5 checkmate_1.8.5 cluster_2.0.7-1
## [19] mixtools_1.1.0 ROCR_1.0-7 limma_3.38.2
## [22] matrixStats_0.53.1 R.utils_2.7.0 docopt_0.6.1
## [25] colorspace_1.3-2 sparsesvd_0.1-4 crayon_1.3.4
## [28] jsonlite_1.5 bindr_0.1.1 survival_2.42-6
## [31] zoo_1.8-3 iterators_1.0.10 ape_5.1
## [34] glue_1.3.0 gtable_0.2.0 kernlab_0.9-26
## [37] prabclus_2.2-6 DEoptimR_1.0-8 scales_1.0.0
## [40] pheatmap_1.0.10 mvtnorm_1.0-8 Rcpp_1.0.0
## [43] metap_0.9 dtw_1.20-1 xtable_1.8-2
## [46] viridisLite_0.3.0 htmlTable_1.12 reticulate_1.10
## [49] foreign_0.8-70 bit_1.1-14 proxy_0.4-22
## [52] mclust_5.4.1 SDMTools_1.1-221 Formula_1.2-3
## [55] DT_0.4 tsne_0.1-3 htmlwidgets_1.2
## [58] httr_1.3.1 FNN_1.1 gplots_3.0.1
## [61] RColorBrewer_1.1-2 fpc_2.1-11 acepack_1.4.1
## [64] modeltools_0.2-22 ica_1.0-2 pkgconfig_2.0.2
## [67] R.methodsS3_1.7.1 flexmix_2.3-14 nnet_7.3-12
## [70] later_0.7.3 labeling_0.3 tidyselect_0.2.4
## [73] rlang_0.3.1 cellranger_1.1.0 munsell_0.5.0
## [76] tools_3.5.1 ggridges_0.5.0 evaluate_0.11
## [79] stringr_1.4.0 yaml_2.1.19 knitr_1.20
## [82] bit64_0.9-7 fitdistrplus_1.0-9 robustbase_0.93-1.1
## [85] caTools_1.17.1 purrr_0.2.5 RANN_2.6.1
## [88] pbapply_1.3-4 nlme_3.1-137 mime_0.5
## [91] slam_0.1-43 R.oo_1.22.0 hdf5r_1.0.0
## [94] compiler_3.5.1 rstudioapi_0.7 curl_3.2
## [97] png_0.1-7 tibble_2.0.1 stringi_1.3.1
## [100] lattice_0.20-35 trimcluster_0.1-2.1 HSMMSingleCell_1.2.0
## [103] pillar_1.3.1 combinat_0.0-8 lmtest_0.9-36
## [106] bitops_1.0-6 httpuv_1.4.5 R6_2.4.0
## [109] latticeExtra_0.6-28 promises_1.0.1 KernSmooth_2.23-15
## [112] gridExtra_2.3 codetools_0.2-15 MASS_7.3-50
## [115] gtools_3.8.1 assertthat_0.2.0 rjson_0.2.20
## [118] rprojroot_1.3-2 withr_2.1.2 qlcMatrix_0.9.7
## [121] diptest_0.75-7 doSNOW_1.0.16 grid_3.5.1
## [124] rpart_4.1-13 tidyr_0.8.1 class_7.3-14
## [127] rmarkdown_1.10 segmented_0.5-3.0 Rtsne_0.13
## [130] shiny_1.1.0 base64enc_0.1-3